import re
import pandas as pd
raw = pd.read_csv("金庸-射雕英雄传txt精校版.txt",
                  names = ['txt'], sep ='aaa', encoding ="GBK" ,engine='python')

def head(tmpstr):
    return tmpstr[:1]
def mid(tmpstr):
    return tmpstr.find("回 ")

raw['head']=raw.txt.apply(head)
raw['mid']=raw.txt.apply(mid)
raw['len']=raw.txt.apply(len)

chapnum = 0
for i in range(len(raw)):
    if raw['head'][i] == "第" and raw['mid'][i] > 0 and raw['len'][i] < 30:
        chapnum += 1
    if chapnum >= 40 and raw['txt'][i] == "附录一：成吉思汗家族":
        chapnum = 0
    raw.loc[i, 'chap'] = chapnum


del raw['head']
del raw['mid']
del raw['len']

tmpchap=raw[raw['chap'] == 7].copy()
tmpchap.reset_index(drop=True, inplace=True)
tmpchap['paraidx']=tmpchap.index

list=[]
for i in tmpchap['paraidx']:
    tmppara=tmpchap[tmpchap['paraidx'] == i].copy()
    tmpstr=tmppara.txt[i]
    sentences=re.findall('(.*?[。！？ ；：])', tmpstr)
    list.append(sentences)
print(list)